a4e898
@@ -44,9 +44,9 @@
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveInputFormat;
-import org.apache.hadoop.hive.ql.metadata.InputEstimator;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
+import org.apache.hadoop.hive.ql.metadata.InputEstimator;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
@@ -55,13 +55,25 @@
 import org.apache.hadoop.hive.ql.parse.QB;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.SplitSample;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.ListSinkDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobConf;
 
@@ -73,9 +85,11 @@
 
   private final Log LOG = LogFactory.getLog(SimpleFetchOptimizer.class.getName());
 
+  @Override
   public ParseContext transform(ParseContext pctx) throws SemanticException {
     Map<String, Operator<? extends OperatorDesc>> topOps = pctx.getTopOps();
-    if (pctx.getQB().isSimpleSelectQuery() && topOps.size() == 1) {
+    if (pctx.getQB().getIsQuery() && !pctx.getQB().getParseInfo().isAnalyzeCommand()
+        && topOps.size() == 1) {
       // no join, no groupby, no distinct, no lateral view, no subq,
       // no CTAS or insert, not analyze command, and single sourced.
       String alias = (String) pctx.getTopOps().keySet().toArray()[0];
@@ -144,7 +158,7 @@
private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) thr
   // for non-aggressive mode (minimal)
   // 1. samping is not allowed
   // 2. for partitioned table, all filters should be targeted to partition column
-  // 3. SelectOperator should be select star
+  // 3. SelectOperator should use only simple cast/column access
   private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias,
       TableScanOperator ts) throws HiveException {
     SplitSample splitSample = pctx.getNameToSplitSample().get(alias);
@@ -156,7 +170,7 @@
private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias,
       return null;
     }
 
-    Table table = qb.getMetaData().getAliasToTable().get(alias);
+    Table table = pctx.getTopToTable().get(ts);
     if (table == null) {
       return null;
     }
@@ -181,34 +195,71 @@
private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias,
     return null;
   }
 
-  private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggresive,
+  private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggressive,
       boolean bypassFilter) {
     if (ts.getChildOperators().size() != 1) {
       return null;
     }
     Operator<?> op = ts.getChildOperators().get(0);
     for (; ; op = op.getChildOperators().get(0)) {
-      if (aggresive) {
-        if (!(op instanceof LimitOperator || op instanceof FilterOperator
-            || op instanceof SelectOperator)) {
+      if (op instanceof SelectOperator) {
+        if (!aggressive) {
+          if (!checkExpressions((SelectOperator) op)) {
+            break;
+          }
+        }
+        continue;
+      }
+
+      if (aggressive) {
+        if (!(op instanceof LimitOperator || op instanceof FilterOperator)) {
           break;
         }
-      } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter)
-          || (op instanceof SelectOperator && ((SelectOperator) op).getConf().isSelectStar()))) {
+      } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) {
         break;
       }
+
       if (op.getChildOperators() == null || op.getChildOperators().size() != 1) {
         return null;
       }
     }
+
     if (op instanceof FileSinkOperator) {
       fetch.scanOp = ts;
       fetch.fileSink = op;
       return fetch;
     }
+
     return null;
   }
 
+  private boolean checkExpressions(SelectOperator op) {
+    SelectDesc desc = op.getConf();
+    for (ExprNodeDesc expr : desc.getColList()) {
+      if (!checkExpression(expr)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  private boolean checkExpression(ExprNodeDesc expr) {
+    if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeColumnDesc) {
+      return true;
+    }
+
+    if (expr instanceof ExprNodeGenericFuncDesc) {
+      GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF();
+      if (udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar
+          || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToDecimal
+          || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp
+          || udf instanceof GenericUDFToVarchar) {
+        return expr.getChildren().size() == 1 && checkExpression(expr.getChildren().get(0));
+      }
+    }
+    return false;
+  }
+
   private class FetchData {
 
     private final ReadEntity parent;
@@ -240,7 +291,7 @@
private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList,
       this.splitSample = splitSample;
       this.onlyPruningFilter = bypassFilter;
     }
-    
+
     /*
      * all filters were executed during partition pruning
      */
@@ -251,7 +302,7 @@
public boolean hasOnlyPruningFilter() {
     private FetchWork convertToWork() throws HiveException {
       inputs.clear();
       if (!table.isPartitioned()) {
-        inputs.add(new ReadEntity(table, parent));
+        inputs.add(new ReadEntity(table, parent, parent == null));
         FetchWork work = new FetchWork(table.getPath(), Utilities.getTableDesc(table));
         PlanUtils.configureInputJobPropertiesForStorageHandler(work.getTblDesc());
         work.setSplitSample(splitSample);
@@ -261,12 +312,12 @@
private FetchWork convertToWork() throws HiveException {
       List<PartitionDesc> partP = new ArrayList<PartitionDesc>();
 
       for (Partition partition : partsList.getNotDeniedPartns()) {
-        inputs.add(new ReadEntity(partition, parent));
+        inputs.add(new ReadEntity(partition, parent, parent == null));
         listP.add(partition.getDataLocation());
         partP.add(Utilities.getPartitionDesc(partition));
       }
       Table sourceTable = partsList.getSourceTable();
-      inputs.add(new ReadEntity(sourceTable, parent));
+      inputs.add(new ReadEntity(sourceTable, parent, parent == null));
       TableDesc table = Utilities.getTableDesc(sourceTable);
       FetchWork work = new FetchWork(listP, partP, table);
       if (!work.getPartDesc().isEmpty()) {
